library(rJava)
library(qCBA)
library(arc)
.jinit()
[1] 0

Data preparation

data <- read.csv("../data/breast-w0.csv")
data[1:5,]
smp_size <- floor(1 * nrow(data))
set.seed(123)
train_ind <- sample(seq_len(nrow(data)), size = smp_size)
train <- data[train_ind, ]
test <- data[train_ind, ]

Model training

CBA

rmCBA <- cba(train, classAtt=colnames(data)[length(colnames(data))])
Using automatic threshold detection
Running apriori with setting: confidence =  0.5 , support =  0 , minlen =  2 , maxlen =  3 , MAX_RULE_LEN =  10
Rule count:  483  Iteration:  1
Increasing maxlen to:   4
Running apriori with setting: confidence =  0.5 , support =  0 , minlen =  2 , maxlen =  4 , MAX_RULE_LEN =  10
Rule count:  5493  Iteration:  2
Target rule count satisfied:   1000
Removing excess discovered rules
Rule learning took: 0.15  seconds
Original rules:  1000
Rules after data coverage pruning: 47
Performing default rule pruning.
Final rule list size:  42
Pruning took: 0.83  seconds
inspect(rmCBA@rules[1:5])
    lhs                                    rhs              support confidence     lift count lhs_length
[1] {Cell_Size_Uniformity=-inf_to_1.5,                                                                  
     Normal_Nucleoli=-inf_to_2.5}       => {Class=benign} 0.5350318          1 1.524272   336          2
[2] {Bare_Nuclei=-inf_to_1.5,                                                                           
     Single_Epi_Cell_Size=-inf_to_2.5}  => {Class=benign} 0.5159236          1 1.524272   324          2
[3] {Cell_Shape_Uniformity=-inf_to_1.5,                                                                 
     Normal_Nucleoli=-inf_to_2.5}       => {Class=benign} 0.4920382          1 1.524272   309          2
[4] {Cell_Size_Uniformity=-inf_to_1.5,                                                                  
     Bare_Nuclei=-inf_to_1.5}           => {Class=benign} 0.4856688          1 1.524272   305          2
[5] {Marginal_Adhesion=-inf_to_1.5,                                                                     
     Clump_Thickness=-inf_to_4.5}       => {Class=benign} 0.4347134          1 1.524272   273          2

QCBA

Data structure conversion

Conversion to arules

itemMatrixRules <- as.item.matrix(rmqCBA, train)
inspect(itemMatrixRules[1:5,])
    lhs                                    rhs              support confidence     lift count lhs_length
[1] {Cell_Size_Uniformity=-inf_to_1.5,                                                                  
     Normal_Nucleoli=-inf_to_2.5}       => {Class=benign} 0.5350319          1 1.524272   336          2
[2] {Bare_Nuclei=-inf_to_1.5,                                                                           
     Single_Epi_Cell_Size=-inf_to_2.5}  => {Class=benign} 0.5159236          1 1.524272   323          2
[3] {Cell_Shape_Uniformity=-inf_to_1.5,                                                                 
     Normal_Nucleoli=-inf_to_2.5}       => {Class=benign} 0.4920382          1 1.524272   309          2
[4] {Bare_Nuclei=-inf_to_1.5,                                                                           
     Cell_Size_Uniformity=-inf_to_1.5}  => {Class=benign} 0.4856688          1 1.524272   304          2
[5] {Clump_Thickness=-inf_to_4.5,                                                                       
     Marginal_Adhesion=-inf_to_1.5}     => {Class=benign} 0.4347134          1 1.524272   272          2

Conversion to qcba data structure

qcbaRules <- as.qcba.rules(itemMatrixRules)
qcbaRules[1:10,]

Overwriting the QCBA object slot with the new rules and converting back to arules itemMatrix.

rmqCBA@rules <- qcbaRules
itemMatrixRules2 <- as.item.matrix(rmqCBA, train)
inspect(itemMatrixRules2[1:10])
     lhs                                    rhs               support confidence     lift count lhs_length
[1]  {Cell_Size_Uniformity=-inf_to_1.5,                                                                   
      Normal_Nucleoli=-inf_to_2.5}       => {Class=benign} 0.53503186          1 1.524272   336          2
[2]  {Bare_Nuclei=-inf_to_1.5,                                                                            
      Single_Epi_Cell_Size=-inf_to_2.5}  => {Class=benign} 0.51592356          1 1.524272   323          2
[3]  {Cell_Shape_Uniformity=-inf_to_1.5,                                                                  
      Normal_Nucleoli=-inf_to_2.5}       => {Class=benign} 0.49203822          1 1.524272   309          2
[4]  {Bare_Nuclei=-inf_to_1.5,                                                                            
      Cell_Size_Uniformity=-inf_to_1.5}  => {Class=benign} 0.48566878          1 1.524272   304          2
[5]  {Clump_Thickness=-inf_to_4.5,                                                                        
      Marginal_Adhesion=-inf_to_1.5}     => {Class=benign} 0.43471336          1 1.524272   272          2
[6]  {Cell_Shape_Uniformity=-inf_to_1.5,                                                                  
      Clump_Thickness=-inf_to_4.5}       => {Class=benign} 0.41401273          1 1.524272   259          2
[7]  {Bland_Chromatin=-inf_to_2.5,                                                                        
      Cell_Size_Uniformity=-inf_to_1.5}  => {Class=benign} 0.38216561          1 1.524272   240          2
[8]  {Bland_Chromatin=2.5_to_3.5,                                                                         
      Cell_Size_Uniformity=-inf_to_1.5}  => {Class=benign} 0.15764332          1 1.524272    99          2
[9]  {Bland_Chromatin=-inf_to_2.5,                                                                        
      Clump_Thickness=4.5_to_6.5}        => {Class=benign} 0.09235669          1 1.524272    58          2
[10] {Bare_Nuclei=-inf_to_1.5,                                                                            
      Cell_Shape_Uniformity=1.5_to_2.5}  => {Class=benign} 0.05891720          1 1.524272    37          2

Arules packages interoperability

plotly_arules(itemMatrixRules)
'plotly_arules' is deprecated.
Use 'plot' instead.
See help("Deprecated")
inspectDT(itemMatrixRules2)

Explanations

cbaFiringRuleIDs <- explainPrediction.CBARuleModel(rmCBA, train)
dimnames(.) <- NULL:  translated to 
dimnames(.) <- list(NULL,NULL)  <==>  unname(.)
cbaFiringRules <- as.qcba.rules(rmCBA@rules)[cbaFiringRuleIDs,]
# explanation demo
firingRuleIDs <- predict(rmqCBA,test,outputFiringRuleIDs=TRUE)
firingRules <- rmqCBA@rules[firingRuleIDs,]
ir <- new("intervalReader",
          numberSeparator = "_to_",
          negativeInfinity = "-inf",
          positiveInfinity = "inf",
          leftClosedBracket = "<",
          leftOpenBracket = "",
          rightClosedBracket = "",
          rightOpenBracket = ")",
          bracketLen = 0)
explanation_dataframe <- getExplanationsDataframe(rmqCBA@rules, firingRuleIDs, train, includeJustifications = TRUE, ir)
explanation_dataframe
explanation_dataframe <- getClassExplanationsDataframe(rmqCBA, data, ir)
explanation_dataframe
$benign

$malignant
NA
cba_explanation_dataframe <- getExplanationsDataframe(as.qcba.rules(rmCBA@rules), cbaFiringRuleIDs, train, includeJustifications = TRUE, ir)
cba_explanation_dataframe
cba_explanation_dataframe <- getClassExplanationsDataframe(rmCBA, train, ir)
cba_explanation_dataframe[["benign"]]
LS0tDQp0aXRsZTogImFydWxlc0V4cGxhbmF0aW9uIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQpsaWJyYXJ5KHJKYXZhKQ0KbGlicmFyeShxQ0JBKQ0KbGlicmFyeShhcmMpDQoNCi5qaW5pdCgpDQpgYGANCg0KIyBEYXRhIHByZXBhcmF0aW9uDQoNCmBgYHtyfQ0KZGF0YSA8LSByZWFkLmNzdigiLi4vZGF0YS9icmVhc3QtdzAuY3N2IikNCg0KZGF0YVsxOjUsXQ0KYGBgDQoNCmBgYHtyfQ0Kc21wX3NpemUgPC0gZmxvb3IoMSAqIG5yb3coZGF0YSkpDQpzZXQuc2VlZCgxMjMpDQoNCnRyYWluX2luZCA8LSBzYW1wbGUoc2VxX2xlbihucm93KGRhdGEpKSwgc2l6ZSA9IHNtcF9zaXplKQ0KDQp0cmFpbiA8LSBkYXRhW3RyYWluX2luZCwgXQ0KdGVzdCA8LSBkYXRhW3RyYWluX2luZCwgXQ0KYGBgDQoNCg0KIyBNb2RlbCB0cmFpbmluZw0KDQojIyBDQkENCg0KYGBge3J9DQpybUNCQSA8LSBjYmEodHJhaW4sIGNsYXNzQXR0PWNvbG5hbWVzKGRhdGEpW2xlbmd0aChjb2xuYW1lcyhkYXRhKSldKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCmluc3BlY3Qocm1DQkFAcnVsZXNbMTo1XSkNCmBgYA0KDQojIyBRQ0JBDQoNCmBgYHtyfQ0Kcm1xQ0JBIDwtIHFjYmEoY2JhUnVsZU1vZGVsPXJtQ0JBLGRhdGFkZj10cmFpbikNCg0Kcm1xQ0JBQHJ1bGVzWzE6MTAsXQ0KYGBgDQoNCiMgRGF0YSBzdHJ1Y3R1cmUgY29udmVyc2lvbg0KDQojIyBDb252ZXJzaW9uIHRvIGFydWxlcw0KDQpgYGB7cn0NCml0ZW1NYXRyaXhSdWxlcyA8LSBhcy5pdGVtLm1hdHJpeChybXFDQkEsIHRyYWluKQ0KDQppbnNwZWN0KGl0ZW1NYXRyaXhSdWxlc1sxOjUsXSkNCmBgYA0KDQojIyBDb252ZXJzaW9uIHRvIHFjYmEgZGF0YSBzdHJ1Y3R1cmUNCg0KYGBge3J9DQpxY2JhUnVsZXMgPC0gYXMucWNiYS5ydWxlcyhpdGVtTWF0cml4UnVsZXMpDQoNCnFjYmFSdWxlc1sxOjEwLF0NCg0KYGBgDQoNCg0KT3ZlcndyaXRpbmcgdGhlIFFDQkEgb2JqZWN0IHNsb3Qgd2l0aCB0aGUgbmV3IHJ1bGVzIGFuZCBjb252ZXJ0aW5nIGJhY2sgdG8gYXJ1bGVzIGl0ZW1NYXRyaXguDQpgYGB7cn0NCnJtcUNCQUBydWxlcyA8LSBxY2JhUnVsZXMNCg0KaXRlbU1hdHJpeFJ1bGVzMiA8LSBhcy5pdGVtLm1hdHJpeChybXFDQkEsIHRyYWluKQ0KDQppbnNwZWN0KGl0ZW1NYXRyaXhSdWxlczJbMToxMF0pDQpgYGANCg0KIyBBcnVsZXMgcGFja2FnZXMgaW50ZXJvcGVyYWJpbGl0eQ0KYGBge3J9DQpsaWJyYXJ5KGFydWxlc1ZpeikNCg0KaXRlbU1hdHJpeFJ1bGVzIDwtIGFzLml0ZW0ubWF0cml4KHJtcUNCQSwgdHJhaW4pDQpwbG90KGl0ZW1NYXRyaXhSdWxlcykNCmBgYA0KDQpgYGB7cn0NCnBsb3RseV9hcnVsZXMoaXRlbU1hdHJpeFJ1bGVzKQ0KYGBgDQoNCmBgYHtyfQ0KaW5zcGVjdERUKGl0ZW1NYXRyaXhSdWxlczIpDQpgYGANCg0KDQojIEV4cGxhbmF0aW9ucw0KYGBge3J9DQpjYmFGaXJpbmdSdWxlSURzIDwtIGV4cGxhaW5QcmVkaWN0aW9uLkNCQVJ1bGVNb2RlbChybUNCQSwgdHJhaW4pDQpjYmFGaXJpbmdSdWxlcyA8LSBhcy5xY2JhLnJ1bGVzKHJtQ0JBQHJ1bGVzKVtjYmFGaXJpbmdSdWxlSURzLF0NCg0KIyBleHBsYW5hdGlvbiBkZW1vDQpmaXJpbmdSdWxlSURzIDwtIHByZWRpY3Qocm1xQ0JBLHRlc3Qsb3V0cHV0RmlyaW5nUnVsZUlEcz1UUlVFKQ0KZmlyaW5nUnVsZXMgPC0gcm1xQ0JBQHJ1bGVzW2ZpcmluZ1J1bGVJRHMsXQ0KDQppciA8LSBuZXcoImludGVydmFsUmVhZGVyIiwNCiAgICAgICAgICBudW1iZXJTZXBhcmF0b3IgPSAiX3RvXyIsDQogICAgICAgICAgbmVnYXRpdmVJbmZpbml0eSA9ICItaW5mIiwNCiAgICAgICAgICBwb3NpdGl2ZUluZmluaXR5ID0gImluZiIsDQogICAgICAgICAgbGVmdENsb3NlZEJyYWNrZXQgPSAiPCIsDQogICAgICAgICAgbGVmdE9wZW5CcmFja2V0ID0gIiIsDQogICAgICAgICAgcmlnaHRDbG9zZWRCcmFja2V0ID0gIiIsDQogICAgICAgICAgcmlnaHRPcGVuQnJhY2tldCA9ICIpIiwNCiAgICAgICAgICBicmFja2V0TGVuID0gMCkNCg0KZXhwbGFuYXRpb25fZGF0YWZyYW1lIDwtIGdldEV4cGxhbmF0aW9uc0RhdGFmcmFtZShybXFDQkFAcnVsZXMsIGZpcmluZ1J1bGVJRHMsIHRyYWluLCBpbmNsdWRlSnVzdGlmaWNhdGlvbnMgPSBUUlVFLCBpcikNCg0KZXhwbGFuYXRpb25fZGF0YWZyYW1lDQpgYGANCg0KYGBge3J9DQpleHBsYW5hdGlvbl9kYXRhZnJhbWUgPC0gZ2V0Q2xhc3NFeHBsYW5hdGlvbnNEYXRhZnJhbWUocm1xQ0JBLCBkYXRhLCBpcikNCmV4cGxhbmF0aW9uX2RhdGFmcmFtZQ0KYGBgDQpgYGB7cn0NCmNiYV9leHBsYW5hdGlvbl9kYXRhZnJhbWUgPC0gZ2V0RXhwbGFuYXRpb25zRGF0YWZyYW1lKGFzLnFjYmEucnVsZXMocm1DQkFAcnVsZXMpLCBjYmFGaXJpbmdSdWxlSURzLCB0cmFpbiwgaW5jbHVkZUp1c3RpZmljYXRpb25zID0gVFJVRSwgaXIpDQoNCmNiYV9leHBsYW5hdGlvbl9kYXRhZnJhbWUNCmBgYA0KDQpgYGB7cn0NCmNiYV9leHBsYW5hdGlvbl9kYXRhZnJhbWUgPC0gZ2V0Q2xhc3NFeHBsYW5hdGlvbnNEYXRhZnJhbWUocm1DQkEsIHRyYWluLCBpcikNCg0KY2JhX2V4cGxhbmF0aW9uX2RhdGFmcmFtZVtbImJlbmlnbiJdXQ0KYGBgDQoNCg==